# Fit a linear model
lm_mod <- lm(accel ~ times, data = mcycle)
# Visualize the model
termplot(lm_mod, partial.resid = TRUE, se = TRUE)## Loading required package: nlme
## Warning: package 'nlme' was built under R version 3.6.2
## This is mgcv 1.8-31. For overview type 'help("mgcv-package")'.
# Fit the model
gam_mod <- gam(accel ~ s(times), data = mcycle)
# Plot the results
plot(gam_mod, residuals = TRUE, pch = 1)## (Intercept) s(times).1 s(times).2 s(times).3 s(times).4 s(times).5
## -25.545865 -63.718008 43.475644 -110.350132 -22.181006 35.034423
## s(times).6 s(times).7 s(times).8 s(times).9
## 93.176458 -9.283018 -111.661472 17.603782
The smooth is made up of 9 basis functions, each with their own coefficient. A GAM with just two variables can have many coefficients, which means they require a bit more data than linear models.
# Fit a GAM with 3 basis functions
gam_mod_k3 <- gam(accel ~ s(times, k = 3), data = mcycle)
# Fit with 20 basis functions
gam_mod_k20 <- gam(accel ~ s(times, k = 20), data = mcycle)
# Visualize the GAMs
par(mfrow = c(1, 2))
plot(gam_mod_k3, residuals = TRUE, pch = 1)
plot(gam_mod_k20, residuals = TRUE, pch = 1)## s(times)
## 0.0007758036
# Fix the smoothing parameter at 0.1
gam_mod_s1 <- gam(accel ~ s(times), data = mcycle, sp = 0.1)
# Fix the smoothing parameter at 0.0001
gam_mod_s2 <- gam(accel ~ s(times), data = mcycle, sp =0.0001)
# Plot both models
par(mfrow = c(2, 1))
plot(gam_mod_s1, residuals = TRUE, pch = 1)
plot(gam_mod_s2, residuals = TRUE, pch = 1)# Fit the GAM
gam_mod_sk <- gam(accel ~ s(times, k=50), data=mcycle, sp=0.0001)
# Visualize the model
plot(gam_mod_sk, residuals = TRUE, pch = 1)## 'data.frame': 205 obs. of 26 variables:
## $ symbol : int 3 3 1 2 2 2 1 1 1 0 ...
## $ loss : int NA NA NA 164 164 NA 158 NA 158 NA ...
## $ make : Factor w/ 22 levels "alfa-romero",..: 1 1 1 2 2 2 2 2 2 2 ...
## $ fuel : Factor w/ 2 levels "diesel","gas": 2 2 2 2 2 2 2 2 2 2 ...
## $ aspir : Factor w/ 2 levels "std","turbo": 1 1 1 1 1 1 1 1 2 2 ...
## $ doors : Factor w/ 2 levels "four","two": 2 2 2 1 1 2 1 1 1 2 ...
## $ style : Factor w/ 5 levels "convertible",..: 1 1 3 4 4 4 4 5 4 3 ...
## $ drive : Factor w/ 3 levels "4wd","fwd","rwd": 3 3 3 2 1 2 2 2 2 1 ...
## $ eng.loc : Factor w/ 2 levels "front","rear": 1 1 1 1 1 1 1 1 1 1 ...
## $ wb : num 88.6 88.6 94.5 99.8 99.4 ...
## $ length : num 169 169 171 177 177 ...
## $ width : num 64.1 64.1 65.5 66.2 66.4 66.3 71.4 71.4 71.4 67.9 ...
## $ height : num 48.8 48.8 52.4 54.3 54.3 53.1 55.7 55.7 55.9 52 ...
## $ weight : int 2548 2548 2823 2337 2824 2507 2844 2954 3086 3053 ...
## $ eng.type : Factor w/ 7 levels "dohc","dohcv",..: 1 1 6 4 4 4 4 4 4 4 ...
## $ cylinders : Factor w/ 7 levels "eight","five",..: 3 3 4 3 2 2 2 2 2 2 ...
## $ eng.cc : int 130 130 152 109 136 136 136 136 131 131 ...
## $ fuel.sys : Factor w/ 8 levels "1bbl","2bbl",..: 6 6 6 6 6 6 6 6 6 6 ...
## $ bore : num 3.47 3.47 2.68 3.19 3.19 3.19 3.19 3.19 3.13 3.13 ...
## $ stroke : num 2.68 2.68 3.47 3.4 3.4 3.4 3.4 3.4 3.4 3.4 ...
## $ comp.ratio: num 9 9 9 10 8 8.5 8.5 8.5 8.3 7 ...
## $ hp : int 111 111 154 102 115 110 110 110 140 160 ...
## $ rpm : int 5000 5000 5000 5500 5500 5500 5500 5500 5500 5500 ...
## $ city.mpg : int 21 21 19 24 18 19 19 19 17 16 ...
## $ hw.mpg : int 27 27 26 30 22 25 25 25 20 22 ...
## $ price : int 13495 16500 16500 13950 17450 15250 17710 18920 23875 NA ...
# Fit the model
mod_city <- gam(city.mpg ~ s(weight) + s(length) + s(price),
data = mpg, method = "REML")
# Plot the model
plot(mod_city, pages = 1)mod_city2 <- gam(city.mpg ~ s(weight) + s(length) + s(price) + fuel + drive + style,
data = mpg, method = "REML")
# Plot the model
plot(mod_city2, all.terms = TRUE, pages = 1)# Fit the model
mod_city3 <- gam(city.mpg ~ s(weight, by=drive) + s(length, by=drive) + s(price, by=drive) + drive,
data = mpg, method = "REML")
# Plot the model
plot(mod_city3, pages = 1)